### This R script is to plot repayment rates, i.e. Figure 4 Panels (c) and (d)

##########################################################################################
## Load packages (note: install first if not yet loaded)
##########################################################################################

#install.packages("tidyverse")
#install.packages("ggplot2") 
#install.packages("foreign")
#install.packages("doBy")
#install.packages("data.table")

library(tidyverse)
library(ggplot2)
library(foreign)
library(doBy)
library(data.table)

# Unused but keep in case
#install.packages("gridExtra")
#install.packages("ggpubr")
#library(gridExtra)
#library(ggpubr)

##########################################################################################
## Run data work
##########################################################################################

# Load dataset
repay_main_extend <- read.dta("/Users/rpickmans/Library/CloudStorage/Dropbox/Fenix Solar/02-schoolFeeLoans/Replication/data/repayment/fenix_repay_extend_07172020_strict_rep.dta")
# Drop loandayselapsed more than 200 days
repay_main_extend <- subset(repay_main_extend, loandayselapsed<=200)
repay_main_extend <- repay_main_extend %>% mutate(completeloan = ifelse(completeloan == "No", 0, 1))

# Make treatment assignment labels simpler
repay_main_extend$treatmenttype_sh[repay_main_extend$treatmenttype_sh=="R T1-L"] <- "Secured"
repay_main_extend$treatmenttype_sh[repay_main_extend$treatmenttype_sh=="R T1-U"] <- "Surprise Unsecured"
repay_main_extend$treatmenttype_sh[repay_main_extend$treatmenttype_sh=="R T2-U"] <- "Unsecured"
repay_main_extend$treatmenttype_sh[repay_main_extend$treatmenttype_sh=="R T3"] <- "Choice"
repay_main_extend$treatmenttype_sh[repay_main_extend$treatmenttype_sh=="R C"] <- "Control"

# Duplicate treatmenttype and then factorize the treatmenttype (important for graphing)
repay_main_extend$treatmenttype_fac <- ordered(repay_main_extend$treatmenttype_sh)
repay_main_extend$treatmenttype_fac <- ordered(repay_main_extend$treatmenttype_fac, levels = c("Secured", "Surprise Unsecured", "Unsecured", "Choice", "Control"))

# Make versions that are without control and choice
repay_main_extend_noctrl <- repay_main_extend[which(repay_main_extend$treatmenttype_sh !="Control"),]
repay_main_extend_noctrlnochoice <- repay_main_extend_noctrl[which(repay_main_extend_noctrl$treatmenttype_sh !="Choice"),]

# Create a summary dataset that collects together, at each value of loandayselapsed, the mean and variance...
lcr_data <- summaryBy(completeloan ~ loandayselapsed + treatmenttype_fac, FUN=c(mean, sd), data=repay_main_extend_noctrlnochoice)

# I also want differences between completion rates
# Clone lcr_data
piece1 <- subset(lcr_data, treatmenttype_fac=="Secured")
piece2 <- subset(lcr_data, treatmenttype_fac=="Surprise Unsecured")
piece3 <- subset(lcr_data, treatmenttype_fac=="Unsecured")
# Create a dataset that loandayselapsed, and the various means
lcr_data2 <- as.data.table(piece1[,1])
colnames(lcr_data2)[which(names(lcr_data2) == "V1")] <- "loandayselapsed"
lcr_data2$mean.locked <- as.data.table(piece1[,3])
lcr_data2$mean.surpriseunlocked <- as.data.table(piece2[,3])
lcr_data2$mean.unlocked <- as.data.table(piece3[,3])
# Take differences
lcr_data2$redgreen <- lcr_data2$mean.locked - lcr_data2$mean.unlocked
lcr_data2$redorange <- lcr_data2$mean.locked - lcr_data2$mean.surpriseunlocked
lcr_data2$orangegreen <- lcr_data2$mean.surpriseunlocked - lcr_data2$mean.unlocked
# Make pieces again, and make a variable that labels the group, and add that group label
redgreenpiece <- lcr_data2 %>% select("loandayselapsed", "redgreen")
colnames(redgreenpiece)[which(names(redgreenpiece) == "redgreen")] <- "diff"
redgreenpiece$type <- "Total Effect"
redorangepiece <- lcr_data2 %>% select("loandayselapsed", "redorange")
colnames(redorangepiece)[which(names(redorangepiece) == "redorange")] <- "diff"
redorangepiece$type <- "Moral Hazard"
orangegreenpiece <- lcr_data2 %>% select("loandayselapsed", "orangegreen")
colnames(orangegreenpiece)[which(names(orangegreenpiece) == "orangegreen")] <- "diff"
orangegreenpiece$type <- "Selection"
# Stack pieces on top of each other
lcr_data2 <- as.data.table(rbind(redgreenpiece, redorangepiece, orangegreenpiece, fill=TRUE))
# Factorize the levels again, to order as Total Effect, Moral Hazard, Selection
lcr_data2$type_fac <- ordered(lcr_data2$type)
lcr_data2$type_fac <- ordered(lcr_data2$type_fac, levels = c("Total Effect", "Moral Hazard", "Selection"))



############################

# Graph 1: completion rates
myplot <- ggplot() +
  geom_line(data=lcr_data, aes(x=loandayselapsed, y=completeloan.mean, group=treatmenttype_fac, color=treatmenttype_fac), size=0.5) + 
  scale_color_manual(values=c('red', 'orange', 'green')) +
  xlab("Days elapsed since loan creation") +
  ylab("Fraction completed") + 
  scale_y_continuous(breaks=c(0.21, 0.32, 0.43, 0.54, 0.65), limits=c(0.21,.65)) +
  scale_x_continuous(breaks=c(100, 150, 200), limits=c(100, 200)) +
  labs(color='Assigned treatment group') +
  theme(
    legend.justification = c("right", "bottom"),
    legend.position = c(1.00, .00),
    legend.background = element_rect(fill = NA),
    panel.background = element_rect(fill = NA),
    panel.border = element_rect(fill = NA, color = 'grey75'),
    axis.ticks = element_line(color = 'grey85'),
    axis.title.x = element_text(vjust=-1),
    axis.title.y = element_text(vjust=3),
    plot.margin = unit(c(1,1,1,1), 'cm'),
    legend.title=element_blank(),
    legend.text=element_text(size=6), 
    legend.key.size=unit(0.5,"line"),
    plot.title = element_text(size = 10),
    text = element_text(size=10)
    )
# Figure 4 Panel (c)
ggsave(file="/Users/rpickmans/Library/CloudStorage/Dropbox/Fenix Solar/02-schoolFeeLoans/Replication/figures/loancompleteratesgrey.pdf", width = 4, height = 3)

# Graph 2: difference in completion rates
myplot2 <- ggplot() +
  geom_line(data=lcr_data2, aes(x=loandayselapsed, y=diff, group=type_fac, color=type_fac), size=0.5) + 
  scale_color_manual(values=c('red', 'black', 'blue')) +
  xlab("Days elapsed since loan creation") +
  ylab("Difference in fraction completed") + 
  scale_y_continuous(breaks=c(-0.03, 0.01, 0.05, 0.09, 0.13, 0.17), limits=c(-0.03,.17)) +
  scale_x_continuous(breaks=c(100, 150, 200), limits=c(100, 200)) +
  labs(color='Type of effect') +
  theme(
    legend.justification = c("right", "bottom"),
    legend.position = c(1.00, .00),
    legend.background = element_rect(fill = NA),
    panel.background = element_rect(fill = NA),
    panel.border = element_rect(fill = NA, color = 'grey75'),
    axis.ticks = element_line(color = 'grey85'),
    axis.title.x = element_text(vjust=-1),
    axis.title.y = element_text(vjust=3),
    plot.margin = unit(c(1,1,1,1), 'cm'),
    legend.title=element_blank(),
    legend.text=element_text(size=6), 
    legend.key.size=unit(0.5,"line"),
    plot.title = element_text(size = 10),
    text = element_text(size=10)
  )
# Figure 4 Panel (d)
ggsave(file="/Users/rpickmans/Library/CloudStorage/Dropbox/Fenix Solar/02-schoolFeeLoans/Replication/figures/diff_complete.pdf", width = 4, height = 3)
